Taxa bar plot - core taxa only
Packages
pacman::p_load(
dplyr,
tidyverse,
readxl,
vegan,
reshape2,
RColorBrewer,
data.table,
grid,
ggh4x,
ggsci)
# setwd("D:PhD/01_Data/03_Vitro/02_Identification of glycan utilizing microbe") # Windows
setwd("/Volumes/Yiming_Wang/PhD/01_Data/03_Vitro/02_Identification of glycan utilizing microbe") # Mac
df_all <- read_excel("Glycan utilization_L6_taxa formatted.xlsx") %>%
mutate(Genotype=factor(Genotype,levels = c("WT","KO"))) %>%
mutate(Sex = factor(Sex, levels = c("Male", "Female"))) %>%
mutate(Genotype_Sex = factor(Genotype_Sex,levels = c("WT Male","WT Female","KO Male","KO Female"))) %>%
filter(Genotype == "WT") %>%
mutate(Sample = row_number()) %>%
mutate(Treatment = ifelse(Treatment == "No glycan", "mBasal", "mBasal + 2'-FL")) %>%
mutate(Treatment_Sex= paste0(Treatment,"_", Sex)) %>%
relocate(Sample, Treatment_Sex)
# Change 1 to 01
df_all$Sample <- sprintf("%02d", as.numeric(df_all$Sample))
df_all<- df_all %>%
mutate(Sample = factor(paste0("Sample", " ",df_all$Sample)))
Shape data
core taxa
df_presence <- df_all %>%
gather(Taxa, Abundance, -c(1:15)) %>%
select(1,2,3,4,5,6,7,8,16,17) %>%
filter(Abundance != 0) %>% # save the taxa of which abundance is not equal to 0
group_by(Taxa) %>%
summarise(n=n_distinct(OTU_ID)) %>% #get number of samples that have non-zero abundance bacteria as grouped by bacteria#
mutate(n.sample=nrow(df_all)) %>%
mutate(p=n/n.sample)
df_presence_core <- df_presence %>%
filter (p > 0.95)
df_abundance <- df_all %>%
gather(Taxa, Abundance, -c(1:15)) %>% # transform to long table
select(1,3,4,6,9,10,16,17) %>%
group_by(Taxa) %>%
summarise(mean_abundance=mean(Abundance,na.rm=T),
median_abundance = median(Abundance,na.rm=T))
df_abundance_core <- df_abundance %>%
filter(mean_abundance>0.0001)
df_all_core <- left_join(df_presence_core, df_abundance_core, by="Taxa")
df_for_figure <- df_all %>%
gather(Taxa, Abundance, -c(1:15)) %>%
mutate(Taxa = ifelse(Taxa %in% df_all_core$Taxa, Taxa, "Non-core taxa")) %>%
group_by(Sample, OTU_ID, Genotype, Sex, Genotype_Sex, Treatment, Taxa) %>%
summarise(Abundance = sum(Abundance)) %>%
spread(Taxa, Abundance)
shape data frame for core taxa
#Create dataset with just headings and all taxa data
all_taxa1 <- df_for_figure %>% subset(select = c(1,7:ncol(df_for_figure)))
#Create dataset with just headings and all metadata
all_taxa1_names <- df_for_figure[,1:6]
#"Melt" data so it becomes a long list of each cell
all_taxa2<- melt(all_taxa1, id = c("Sample"))
all_taxa2$Sample <- factor(all_taxa2$Sample, levels=unique(all_taxa2$Sample)) #factor the ID column
#Add metadata to melt
all_taxa2_names <- left_join(all_taxa2, all_taxa1_names, by="Sample")
Figure
#Generate bar plot with separate lengend
# Choose the most abundant taxa as the first bar and decedent the samples
order<-all_taxa2_names%>%
filter(variable == "Escherichia Shigella")%>%
arrange(desc(value))
# Factor your oder
order$Sample<-factor(order$Sample,levels=unique(order$Sample))
# Apply the factor level to your original data
all_taxa2_names$Sample<-factor(all_taxa2_names$Sample,levels = order$Sample)
# Define color
mypal = pal_simpsons("springfield", alpha = 0.7)(7)
mypal
## [1] "#FED439B2" "#709AE1B2" "#8A9197B2" "#D2AF81B2" "#FD7446B2" "#D5E4A2B2"
## [7] "#197EC0B2"
library("scales")
show_col(mypal)

set.seed(860) #860
mypal<-sample(mypal)
show_col(mypal)

# Draw bar plot
all_bar <- ggplot(all_taxa2_names, aes(x = Sample, fill = reorder(variable,-value), y = value))+ #reorder taxa based on their relative abundance
geom_bar(position="fill", stat="identity", linetype = 1, width = 0.7, colour = 'black',size = 0.3)+ #can replace colour = "black" with linetype = 0 for no outline
theme(axis.title = element_text(face="plain", size = 10),
axis.title.x=element_text(size =10),
axis.title.y=element_text(size =20, margin = margin(r = 5)),
axis.text.x=element_blank(),
axis.text.y=element_text(colour="black", face="plain", size=15),
axis.ticks.x=element_blank(),
plot.title = element_text(size=10, hjust=0.5, face="plain"),
legend.position = "none",
plot.background = element_rect(fill="transparent", colour = "transparent"))+
guides(fill=guide_legend(ncol=1))+
scale_y_continuous(expand = c(0,0),labels=scales::percent) +
labs(x = "", y = "Relative Abundance (%)", fill = "OTU")+
scale_fill_manual(values = c(mypal)) +
facet_grid(cols = vars(Treatment),scales = "free") + theme(strip.text = element_text(colour = 'white', size = rel(2)))
all_bar

#Just plot the legend
## Three columns
all_bar_legend_set <- ggplot(all_taxa2_names, aes(x = Sample, fill = reorder(variable,-value), y = value))+
geom_bar(position="fill", stat="identity", linetype = 1, width = 0.8, color = "black", size = 0.3)+
theme(legend.title = element_text(size = 8, face = "bold"), legend.text = element_text(size = 6, face = "bold", colour = "black"), legend.key.size = unit(0.2, "cm"))+
scale_fill_manual(values = c(mypal))+
guides(fill=guide_legend(ncol=3))
all_bar_legend <- cowplot::get_legend(all_bar_legend_set)
grid.newpage()
grid.draw(all_bar_legend)
